import pandas as pd
import os
class dataloader:
    def __init__(self,sourcepath,standardpath="") -> None:
        '''
        sourcepath：源文件地址
        standardPat：对比文件地址

        '''
        self.sourcePath = sourcepath
        self.sourceData = pd.read_excel(sourcepath)
        self.sourceColumns = self.sourceData.columns
        self.standardPath = standardpath
        self.standardData = pd.read_excel(standardpath)
        self.standardColnums = self.standardData.columns
    def filter(self,sourceCols = [0,1],standardCols = [1,2],out_path = "./result.xlsx"):
        """
        sourceCols 源文件列
        standardCols 对比文件列
        out_path 输出文件地址
        """
        if(os.path.exists(self.sourcePath)==False):
            print("源excel文件路径不对")
            raise Exception("路径不存在")
        if(os.path.exists(self.standardPath)==False):
            print("标准excel文件路径不对")
            raise Exception("路径不存在")
        sourceDict={}
        standardDict={}
        #组合字段名
        for i in range(0,self.sourceData.shape[0]):
            field = ""
            for j in range(len(sourceCols)):
                field +=str(self.sourceData.loc[i][sourceCols[j]])
            field +="_"
            sourceDict[i]=field 
        for i in range(0,self.standardData.shape[0]):
            field = ""
            for j in range(len(standardCols)):
                field +=str(self.standardData.loc[i][standardCols[j]])
            field +="_"
            standardDict[i]=field
        #声明新的表格
        resDf = pd.DataFrame(index=self.sourceColumns)
        # temp = standardDict.values()
        #写入数据
        deletedIdx = []
        for row in range(0,self.sourceData.shape[0]):
            if(sourceDict[row] not in standardDict.values()):
                deletedIdx.append(row)
        #导入数据
        resDf = self.sourceData.drop(deletedIdx)
        #输出到文件
        resDf.to_excel(out_path, index=False)
def test():
    loader = dataloader(sourcepath="./1067.xlsx"
               ,standardpath="./1066.xlsx")
    resdf = loader.filter()
    resdf.to_excel('./example.xlsx', index=False)
# #数据对比
# loader = dataloader(sourcepath="./1067.xlsx"
#             ,standardpath="./1066.xlsx")
# resdf = loader.filter(sourceCols = [0,1],standardCols = [0,3],out_path = "./result对比结果.xlsx")
        
            
